"""
    @Author  ：思念 
    @File    ：5.豆瓣电影信息.py
    @Date    ：2024/12/5 20:21 
"""
import requests
from lxml import etree

url = "https://movie.douban.com/top250"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
}

res = requests.get(url, headers=headers).text

tree = etree.HTML(res)
img_url_list = tree.xpath("//div[@class='pic']/a/img/@src")
title_list = tree.xpath("//div[@class='hd']/a/span[1]/text()")

for img_url_list,title_list in zip(img_url_list,title_list):
    movie_info = dict()
    movie_info["img_url"] = img_url_list
    movie_info["title"] = title_list
    # print(f"影片信息：{movie_info}")

# last : 获取最后一个
title_last = tree.xpath("//ol/li[last()]//div[@class='hd']/a/span[1]/text()")
print(title_last)

# position()>1 获取除了第一个以外的所有
unless_first_tile = tree.xpath("//ol/li[position()>1]//div[@class='hd']/a/span[1]/text()")
print(unless_first_tile)

# 获取标题为肖申克的救赎的影片
get_title = tree.xpath("//ol/li//div[@class='hd']/a/span[text()='肖申克的救赎']/text()")
print(get_title)

